library(data.table)
library(ggplot2)
library(ggpubr)
library(ggpointdensity)
library(survival)
library(survminer)
library(readxl)
library(stargazer)
library(ggcorrplot)
library(tidyr)
theme_set(theme_bw())Load the results of clinical information from TCGA pan cancer study.
cancer_types = c("COAD", "LUAD", "LUSC", "SKCM")
cdr = read_excel("clinical_data/TCGA-CDR-SupplementalTableS1.xlsx",
sheet = "TCGA-CDR")
dim(cdr)## [1] 11160 34
## # A tibble: 2 x 34
## ...1 bcr_patient_bar… type age_at_initial_… gender race ajcc_pathologic…
## <chr> <chr> <chr> <dbl> <chr> <chr> <chr>
## 1 1 TCGA-OR-A5J1 ACC 58 MALE WHITE Stage II
## 2 2 TCGA-OR-A5J2 ACC 44 FEMALE WHITE Stage IV
## # … with 27 more variables: clinical_stage <chr>, histological_type <chr>,
## # histological_grade <chr>, initial_pathologic_dx_year <dbl>,
## # menopause_status <chr>, birth_days_to <dbl>, vital_status <chr>,
## # tumor_status <chr>, last_contact_days_to <dbl>, death_days_to <dbl>,
## # cause_of_death <chr>, new_tumor_event_type <chr>,
## # new_tumor_event_site <chr>, new_tumor_event_site_other <chr>,
## # new_tumor_event_dx_days_to <dbl>, treatment_outcome_first_course <chr>,
## # margin_status <chr>, residual_tumor <lgl>, OS <dbl>, OS.time <dbl>,
## # DSS <dbl>, DSS.time <dbl>, DFI <dbl>, DFI.time <dbl>, PFI <dbl>,
## # PFI.time <dbl>, Redaction <chr>
##
## COAD LUAD LUSC SKCM
## 459 522 504 470
Load the results of cell type fraction, and then merage them with clinical information. Save the information for each cancer type into a list named ctf (cell type fractions).
cdr$patient_id = substring(cdr$bcr_patient_barcode, first=9)
ctf = list()
for(c1 in cancer_types){
cat("\n--------------------------------------------------------------\n")
cat(c1)
cat("\n--------------------------------------------------------------\n")
load(sprintf("deconv_expr_%s.RData", c1))
load(sprintf("deconv_methy_%s.RData", c1))
deconv_expr = list(as.name(sprintf("deconv_expr_%s", c1)))
deconv_meth = list(as.name(sprintf("rho_%s", c1)))
cat("\ncell type fraction estimates from expression\n")
print(do.call(dim, deconv_expr))
print(eval(deconv_expr[[1]])[1:2,])
cat("\ncell type fraction estimates from methylation\n")
print(do.call(dim, deconv_meth))
print(eval(deconv_meth[[1]])[1,,])
stopifnot(all(do.call(rownames, deconv_expr) ==
do.call(dimnames, deconv_meth)[[1]]))
ct = eval(deconv_expr[[1]])
colnames(ct) = paste0(colnames(ct), ".E")
ct = cbind(ct, eval(deconv_meth[[1]])[,,"EMeth"])
dim(ct)
ct[1:2,]
ct[which(ct < 5e-3)] = 5e-3
stopifnot(all(rownames(ct) %in% cdr$patient_id))
ct = as.data.frame(ct)
ct$patient_id = rownames(ct)
ct = merge(ct, cdr, by="patient_id")
dim(ct)
ct[1:2,]
ctf[[c1]] = ct
}##
## --------------------------------------------------------------
## COAD
## --------------------------------------------------------------
##
## cell type fraction estimates from expression
## [1] 185 7
## CD4T CD8T Monocyte B NK Neutrophil
## 5656 0.07627453 0.01670232 0.02561258 0.04381193 0.01982880 0.00652889
## 6781 0.06005796 0.12891577 0.10364729 0.07942259 0.05740941 0.04947972
## Treg
## 5656 0.01124095
## 6781 0.02106727
##
## cell type fraction estimates from methylation
## [1] 185 7 5
## EMeth svr ls rls qp
## CD4T 0.026415214 0.0319012555 0.00000000 0.000000000 2.000000e-01
## CD8T 0.003385372 0.0409917716 0.12346010 0.119316714 -8.686388e-18
## Monocyte 0.049698012 0.0576861975 0.03145094 0.032486828 1.205043e-16
## B 0.024464355 0.0532250575 0.02314771 0.024156154 0.000000e+00
## NK 0.010294409 0.0123506692 0.00000000 0.001562089 1.524040e-16
## Neutrophil 0.012566427 0.0029683220 0.02194124 0.022478216 -1.019300e-16
## Treg 0.073176210 0.0008767268 0.00000000 0.000000000 -3.707308e-17
##
## --------------------------------------------------------------
## LUAD
## --------------------------------------------------------------
##
## cell type fraction estimates from expression
## [1] 386 7
## CD4T CD8T Monocyte B NK Neutrophil Treg
## 2666 0.2116564 0.02985346 0.04384860 0.1298613 0.02478026 0.000000000 0
## 3918 0.2976973 0.00000000 0.08549598 0.1191329 0.03891238 0.008761388 0
##
## cell type fraction estimates from methylation
## [1] 386 7 5
## EMeth svr ls rls qp
## CD4T 0.04277126 0.051700419 0.00000000 0.00000000 4.400000e-01
## CD8T 0.07050939 0.162606264 0.26055267 0.27921028 1.291454e-16
## Monocyte 0.03198820 0.060022761 0.04819152 0.03858709 2.720060e-17
## B 0.07414185 0.095191019 0.05165730 0.05069063 0.000000e+00
## NK 0.03264954 0.009494585 0.00000000 0.00000000 1.012392e-18
## Neutrophil 0.07449514 0.060984952 0.07959851 0.07151200 2.542433e-17
## Treg 0.11344463 0.000000000 0.00000000 0.00000000 4.502091e-18
##
## --------------------------------------------------------------
## LUSC
## --------------------------------------------------------------
##
## cell type fraction estimates from expression
## [1] 296 7
## CD4T CD8T Monocyte B NK Neutrophil
## 6546 0.1785366 0.07040708 0.08526741 0.21031966 0.03650193 0.001964585
## A50Z 0.1267777 0.00000000 0.06197406 0.06561747 0.09088046 0.276548787
## Treg
## 6546 0.007002724
## A50Z 0.018201514
##
## cell type fraction estimates from methylation
## [1] 296 7 5
## EMeth svr ls rls qp
## CD4T -2.297196e-17 0.164670429 0.000000000 0.008166193 3.877593e-01
## CD8T 1.135327e-01 0.082647787 0.294630259 0.301378010 3.105060e-02
## Monocyte 1.492672e-01 0.156557168 0.155026869 0.150076818 1.233630e-02
## B 1.574076e-01 0.133672318 0.132459221 0.128402044 1.588538e-01
## NK 1.006683e-01 0.050492578 0.004148921 0.000000000 9.969011e-20
## Neutrophil 6.912424e-02 0.000000000 0.002147771 0.001976935 3.187354e-17
## Treg 0.000000e+00 0.001959721 0.001586959 0.000000000 0.000000e+00
##
## --------------------------------------------------------------
## SKCM
## --------------------------------------------------------------
##
## cell type fraction estimates from expression
## [1] 391 7
## CD4T CD8T Monocyte B NK Neutrophil
## A51H 0.2188935 0.03499094 0.05367095 0.545323628 0.03791312 0.00000000
## A728 0.1688186 0.24422901 0.09891862 0.009328079 0.03907109 0.07365859
## Treg
## A51H 0.009207837
## A728 0.075976001
##
## cell type fraction estimates from methylation
## [1] 391 7 5
## EMeth svr ls rls qp
## CD4T 4.643562e-01 0.384154045 0.35310646 0.364675454 4.895262e-01
## CD8T -1.163978e-17 0.000000000 0.00000000 0.000000000 5.345828e-19
## Monocyte -5.503769e-19 0.033600316 0.04366372 0.035335965 2.338829e-18
## B 4.356438e-01 0.366820183 0.38580363 0.396619361 4.104738e-01
## NK -1.734480e-19 0.009108515 0.01178413 0.001574085 1.531097e-17
## Neutrophil -4.341269e-18 0.000000000 0.00000000 0.000000000 3.956396e-20
## Treg -1.391424e-18 0.106316941 0.10564205 0.101795135 0.000000e+00
This function cox_summary summarizes the association between cell type composition and survival time.
cox_summary <- function(cell_type1, ct, surv_var){
fm1 = sprintf("Surv(%s.time, %s) ~", surv_var, surv_var)
fm1 = paste(fm1, "age_at_initial_pathologic_diagnosis + gender +")
fm2 = as.formula(paste(fm1, cell_type1))
res.cox = coxph(fm2, data = ct)
x = summary(res.cox)
k = nrow(x$coefficients)
pval = signif(x$coefficients[k,"Pr(>|z|)"], digits=2)
beta = signif(x$coefficients[k, "coef"], digits=3)
HR = signif(x$coefficients[k, "exp(coef)"], digits=3)
HR.confint.lower = sprintf("%.2e", x$conf.int[k, "lower .95"])
HR.confint.upper = sprintf("%.2e", x$conf.int[k, "upper .95"])
HR = paste0(HR, " (", HR.confint.lower, "--", HR.confint.upper, ")")
res = c(beta, HR, pval)
names(res) = c("beta", "HR (95% CI for HR)", "p.value")
return(res)
}Check the associations between cell type fractions and disease-specific survival.
for(c1 in cancer_types){
cat("\n--------------------------------------------------------------\n")
cat(c1)
cat("\n--------------------------------------------------------------\n")
ct = ctf[[c1]]
cell_types = names(ct)[2:15]
for(ctype1 in cell_types){
ct[[ctype1]] = log2(ct[[ctype1]])
}
df1 = t(sapply(cell_types, cox_summary, ct=ct, surv_var="DSS"))
print(as.data.frame(df1))
}##
## --------------------------------------------------------------
## COAD
## --------------------------------------------------------------
## beta HR (95% CI for HR) p.value
## CD4T.E 0.0225 1.02 (5.66e-01--1.85e+00) 0.94
## CD8T.E -0.0135 0.987 (6.65e-01--1.46e+00) 0.95
## Monocyte.E -0.0214 0.979 (6.00e-01--1.60e+00) 0.93
## B.E 0.15 1.16 (6.76e-01--2.00e+00) 0.59
## NK.E -0.204 0.815 (5.18e-01--1.28e+00) 0.38
## Neutrophil.E 0.0544 1.06 (7.25e-01--1.54e+00) 0.78
## Treg.E -0.00836 0.992 (6.42e-01--1.53e+00) 0.97
## CD4T -0.0674 0.935 (6.89e-01--1.27e+00) 0.67
## CD8T 0.0362 1.04 (8.10e-01--1.33e+00) 0.77
## Monocyte -0.129 0.879 (6.28e-01--1.23e+00) 0.45
## B 0.0833 1.09 (7.85e-01--1.50e+00) 0.62
## NK -0.13 0.878 (6.16e-01--1.25e+00) 0.47
## Neutrophil -0.0213 0.979 (7.32e-01--1.31e+00) 0.89
## Treg 0.0574 1.06 (7.79e-01--1.44e+00) 0.71
##
## --------------------------------------------------------------
## LUAD
## --------------------------------------------------------------
## beta HR (95% CI for HR) p.value
## CD4T.E 0.0165 1.02 (7.58e-01--1.36e+00) 0.91
## CD8T.E 0.0442 1.05 (8.82e-01--1.24e+00) 0.61
## Monocyte.E 0.0425 1.04 (8.12e-01--1.34e+00) 0.74
## B.E -0.173 0.841 (6.71e-01--1.05e+00) 0.13
## NK.E 0.162 1.18 (9.13e-01--1.51e+00) 0.21
## Neutrophil.E 0.0571 1.06 (8.50e-01--1.32e+00) 0.61
## Treg.E -0.122 0.885 (7.19e-01--1.09e+00) 0.25
## CD4T -0.0412 0.96 (8.56e-01--1.08e+00) 0.48
## CD8T -0.0285 0.972 (8.89e-01--1.06e+00) 0.53
## Monocyte -0.0412 0.96 (7.98e-01--1.15e+00) 0.66
## B -0.101 0.904 (7.85e-01--1.04e+00) 0.16
## NK 0.149 1.16 (9.75e-01--1.38e+00) 0.095
## Neutrophil -0.00564 0.994 (8.69e-01--1.14e+00) 0.93
## Treg 0.00366 1 (8.98e-01--1.12e+00) 0.95
##
## --------------------------------------------------------------
## LUSC
## --------------------------------------------------------------
## beta HR (95% CI for HR) p.value
## CD4T.E -0.198 0.82 (6.29e-01--1.07e+00) 0.15
## CD8T.E -0.0374 0.963 (7.94e-01--1.17e+00) 0.7
## Monocyte.E -0.126 0.881 (6.62e-01--1.17e+00) 0.39
## B.E -0.124 0.883 (7.13e-01--1.09e+00) 0.25
## NK.E -0.208 0.813 (6.14e-01--1.07e+00) 0.15
## Neutrophil.E 0.0935 1.1 (8.74e-01--1.38e+00) 0.42
## Treg.E -0.114 0.892 (6.62e-01--1.20e+00) 0.45
## CD4T 0.0367 1.04 (9.06e-01--1.19e+00) 0.6
## CD8T -0.0222 0.978 (8.75e-01--1.09e+00) 0.7
## Monocyte 0.0116 1.01 (8.47e-01--1.21e+00) 0.9
## B -0.181 0.834 (7.22e-01--9.64e-01) 0.014
## NK -0.0642 0.938 (7.84e-01--1.12e+00) 0.48
## Neutrophil -0.0382 0.963 (8.36e-01--1.11e+00) 0.59
## Treg 0.0591 1.06 (9.32e-01--1.21e+00) 0.37
##
## --------------------------------------------------------------
## SKCM
## --------------------------------------------------------------
## beta HR (95% CI for HR) p.value
## CD4T.E -0.163 0.85 (7.65e-01--9.44e-01) 0.0024
## CD8T.E -0.145 0.865 (7.95e-01--9.40e-01) 0.00068
## Monocyte.E -0.0737 0.929 (8.22e-01--1.05e+00) 0.24
## B.E -0.103 0.902 (8.22e-01--9.90e-01) 0.029
## NK.E -0.154 0.857 (7.53e-01--9.76e-01) 0.02
## Neutrophil.E 0.0205 1.02 (8.37e-01--1.24e+00) 0.84
## Treg.E -0.133 0.875 (7.76e-01--9.87e-01) 0.03
## CD4T -0.0164 0.984 (9.09e-01--1.06e+00) 0.68
## CD8T -0.13 0.878 (8.21e-01--9.39e-01) 0.00015
## Monocyte -0.0343 0.966 (8.90e-01--1.05e+00) 0.41
## B -0.143 0.867 (7.95e-01--9.45e-01) 0.0011
## NK -0.0887 0.915 (8.19e-01--1.02e+00) 0.12
## Neutrophil 0.113 1.12 (1.02e+00--1.23e+00) 0.015
## Treg 0.116 1.12 (1.03e+00--1.23e+00) 0.013
Check the associations between disease-specific survival and relative cell type fractions with respect to CD8 T cells.
for(c1 in cancer_types){
cat("\n--------------------------------------------------------------\n")
cat(c1)
cat("\n--------------------------------------------------------------\n")
ct = ctf[[c1]]
cell_types = setdiff(names(ct)[2:15], c("CD8T.E", "CD8T"))
for(ctype1 in cell_types){
ctype1r = paste0(ctype1, "_lr")
ctype2 = "CD8T"
if(grepl(".E", ctype1, fixed=TRUE)){ ctype2 = "CD8T.E"}
# cat(ctype1, ctype2, "\n")
ct[[ctype1r]] = log2(ct[[ctype1]]/ct[[ctype2]])
}
cell_types = paste0(cell_types, "_lr")
df1 = t(sapply(cell_types, cox_summary, ct=ct, surv_var="DSS"))
print(as.data.frame(df1))
}##
## --------------------------------------------------------------
## COAD
## --------------------------------------------------------------
## beta HR (95% CI for HR) p.value
## CD4T.E_lr 0.0275 1.03 (6.70e-01--1.58e+00) 0.9
## Monocyte.E_lr -0.00068 0.999 (5.41e-01--1.85e+00) 1
## B.E_lr 0.211 1.24 (6.74e-01--2.26e+00) 0.49
## NK.E_lr -0.219 0.803 (4.90e-01--1.32e+00) 0.38
## Neutrophil.E_lr 0.0573 1.06 (7.45e-01--1.51e+00) 0.75
## Treg.E_lr 0.00902 1.01 (6.37e-01--1.60e+00) 0.97
## CD4T_lr -0.104 0.901 (6.77e-01--1.20e+00) 0.47
## Monocyte_lr -0.128 0.88 (6.62e-01--1.17e+00) 0.38
## B_lr 0.0318 1.03 (7.17e-01--1.49e+00) 0.86
## NK_lr -0.0851 0.918 (7.28e-01--1.16e+00) 0.47
## Neutrophil_lr -0.0285 0.972 (8.09e-01--1.17e+00) 0.76
## Treg_lr 0.00136 1 (8.19e-01--1.22e+00) 0.99
##
## --------------------------------------------------------------
## LUAD
## --------------------------------------------------------------
## beta HR (95% CI for HR) p.value
## CD4T.E_lr -0.0394 0.961 (8.10e-01--1.14e+00) 0.65
## Monocyte.E_lr -0.0196 0.981 (8.43e-01--1.14e+00) 0.8
## B.E_lr -0.127 0.881 (7.45e-01--1.04e+00) 0.14
## NK.E_lr 0.024 1.02 (8.83e-01--1.19e+00) 0.75
## Neutrophil.E_lr -0.00758 0.992 (8.68e-01--1.13e+00) 0.91
## Treg.E_lr -0.118 0.888 (7.55e-01--1.05e+00) 0.16
## CD4T_lr 0.00119 1 (9.45e-01--1.06e+00) 0.97
## Monocyte_lr 0.0219 1.02 (9.29e-01--1.12e+00) 0.65
## B_lr -0.0152 0.985 (8.74e-01--1.11e+00) 0.8
## NK_lr 0.0625 1.06 (9.78e-01--1.16e+00) 0.15
## Neutrophil_lr 0.0184 1.02 (9.45e-01--1.10e+00) 0.63
## Treg_lr 0.0114 1.01 (9.58e-01--1.07e+00) 0.68
##
## --------------------------------------------------------------
## LUSC
## --------------------------------------------------------------
## beta HR (95% CI for HR) p.value
## CD4T.E_lr -0.054 0.947 (7.76e-01--1.16e+00) 0.6
## Monocyte.E_lr -0.0136 0.986 (8.26e-01--1.18e+00) 0.88
## B.E_lr -0.0392 0.962 (8.19e-01--1.13e+00) 0.63
## NK.E_lr -0.04 0.961 (8.12e-01--1.14e+00) 0.64
## Neutrophil.E_lr 0.0621 1.06 (9.15e-01--1.24e+00) 0.42
## Treg.E_lr -0.0143 0.986 (8.14e-01--1.19e+00) 0.88
## CD4T_lr 0.0229 1.02 (9.46e-01--1.11e+00) 0.57
## Monocyte_lr 0.0292 1.03 (9.17e-01--1.16e+00) 0.62
## B_lr -0.138 0.871 (7.45e-01--1.02e+00) 0.085
## NK_lr -0.00197 0.998 (9.01e-01--1.11e+00) 0.97
## Neutrophil_lr -0.00117 0.999 (9.19e-01--1.09e+00) 0.98
## Treg_lr 0.0251 1.03 (9.57e-01--1.10e+00) 0.48
##
## --------------------------------------------------------------
## SKCM
## --------------------------------------------------------------
## beta HR (95% CI for HR) p.value
## CD4T.E_lr 0.0775 1.08 (9.79e-01--1.19e+00) 0.12
## Monocyte.E_lr 0.145 1.16 (1.06e+00--1.27e+00) 0.0019
## B.E_lr 0.0742 1.08 (9.84e-01--1.18e+00) 0.11
## NK.E_lr 0.128 1.14 (1.03e+00--1.26e+00) 0.014
## Neutrophil.E_lr 0.126 1.13 (1.05e+00--1.22e+00) 0.001
## Treg.E_lr 0.157 1.17 (1.04e+00--1.31e+00) 0.0087
## CD4T_lr 0.0916 1.1 (1.03e+00--1.16e+00) 0.0023
## Monocyte_lr 0.117 1.12 (1.05e+00--1.20e+00) 0.00089
## B_lr 0.0575 1.06 (9.80e-01--1.14e+00) 0.14
## NK_lr 0.0761 1.08 (1.02e+00--1.14e+00) 0.011
## Neutrophil_lr 0.094 1.1 (1.05e+00--1.15e+00) 0.00011
## Treg_lr 0.0919 1.1 (1.05e+00--1.15e+00) 1e-04
Generate plots of survival curves with respect to cell type fractions for SKCM.
ct = ctf[["SKCM"]]
cell_types = c("CD4T", "CD8T", "Monocyte", "B", "NK", "Neutrophil", "Treg")
glist = list()
for(ctype1 in cell_types){
ctype1E = paste0(ctype1, ".E")
ab = rep("Low", nrow(ct))
ab[which(ct[[ctype1E]] > median(ct[[ctype1E]]))] = "High"
fit1 = survfit(Surv(DSS.time, DSS) ~ ab, data=ct)
g1 = ggsurvplot(fit1, data = ct, risk.table = TRUE,
pval=TRUE, pval.method=TRUE,
title=paste(ctype1, ".E"))
ab = rep("Low", nrow(ct))
ab[which(ct[[ctype1]] > median(ct[[ctype1]]))] = "High"
fit2 = survfit(Surv(DSS.time, DSS) ~ ab, data=ct)
g2 = ggsurvplot(fit2, data = ct, risk.table = TRUE,
pval=TRUE, pval.method=TRUE,
title=ctype1)
print(g1)
print(g2)
}Generate plots of survival curves with respect to relative cell type fractions for SKCM.
ct = ctf[["SKCM"]]
cell_types = c("CD4T", "Monocyte", "B", "NK", "Neutrophil", "Treg")
glist = list()
for(ctype1 in cell_types){
ctype1E = paste0(ctype1, ".E")
ct.lrE = log2(ct[[ctype1E]]/ct[["CD8T.E"]])
rel.ab = rep("Low", length(ct.lrE))
rel.ab[which(ct.lrE > median(ct.lrE))] = "High"
fit1 = survfit(Surv(DSS.time, DSS) ~ rel.ab, data=ct)
g1 = ggsurvplot(fit1, data = ct, risk.table = TRUE,
pval=TRUE, pval.method=TRUE,
title=paste(ctype1, ".E"))
ct.lr = log2(ct[[ctype1]]/ct[["CD8T"]])
rel.ab = rep("Low", length(ct.lr))
rel.ab[which(ct.lr > median(ct.lr))] = "High"
fit2 = survfit(Surv(DSS.time, DSS) ~ rel.ab, data=ct)
g2 = ggsurvplot(fit2, data = ct, risk.table = TRUE,
pval=TRUE, pval.method=TRUE,
title=ctype1)
print(g1)
print(g2)
}First load mutation burden data
scL = list()
for(c1 in cancer_types){
sc = readRDS(sprintf("clinical_data/%s_somatic_clinic.rds", c1))
dim(sc)
names(sc)
sc = sc[,-(23:32)]
dim(sc)
sc[1:2,]
names(sc)[1] = "bcr_patient_barcode"
ct = ctf[[c1]]
dim(ct)
ct[1:2,]
sc = merge(sc, ct, by="bcr_patient_barcode")
dim(sc)
sc[1:2,]
scL[[c1]] = sc
}
lapply(ctf, dim)## $COAD
## [1] 185 49
##
## $LUAD
## [1] 386 49
##
## $LUSC
## [1] 296 49
##
## $SKCM
## [1] 391 49
## $COAD
## [1] 177 76
##
## $LUAD
## [1] 349 76
##
## $LUSC
## [1] 274 76
##
## $SKCM
## [1] 357 76
Next assess the associations between mutation load and cell type fractions
for(c1 in cancer_types){
cat("\n--------------------------------------------------------------\n")
cat(c1)
cat("\n--------------------------------------------------------------\n")
sc = scL[[c1]]
mb1 = sc$raw_MB_hg38_SNV + sc$raw_MB_hg38_INDEL
mb2 = sc$raw_MB_hg38_SNV*sc$prop_clonal + sc$raw_MB_hg38_INDEL
mb = cbind(mb1, mb2)
mb[mb < 10] = NA
mb = log10(mb)
cf = sc[,30:43]
cr1 = cor(mb, cf, method="spearman", use="pairwise.complete.obs")
pv1 = matrix(NA, nrow=2, ncol=14)
for(j in 1:2){
for(k in 1:14){
cjk = cor.test(mb[,j], cf[,k], method="spearman")
pv1[j,k] = cjk$p.value
}
}
cp = cbind(round(t(cr1),3), signif(t(pv1),2))
colnames(cp)[3:4] = c("mb1.pval", "mb2.pval")
print(cp)
crE = cr1[,1:7]
colnames(crE) = gsub(".E", "", colnames(crE))
gc1 = ggcorrplot(crE, lab = TRUE, title=paste0(c1, "-Expr"))
gc2 = ggcorrplot(cr1[,8:14], lab = TRUE, title=paste0(c1, "-Meth"))
gga = ggarrange(gc1, gc2 + rremove("legend"), ncol = 2, nrow = 1)
print(gga)
}##
## --------------------------------------------------------------
## COAD
## --------------------------------------------------------------
## mb1 mb2 mb1.pval mb2.pval
## CD4T.E -0.133 -0.178 0.0790 0.018
## CD8T.E 0.171 0.131 0.0230 0.081
## Monocyte.E 0.065 0.014 0.3900 0.850
## B.E -0.041 -0.104 0.5900 0.170
## NK.E 0.123 0.094 0.1000 0.210
## Neutrophil.E 0.048 0.049 0.5200 0.510
## Treg.E 0.100 0.011 0.1800 0.880
## CD4T -0.130 -0.162 0.0840 0.031
## CD8T 0.237 0.210 0.0015 0.005
## Monocyte -0.002 -0.073 0.9800 0.330
## B 0.024 -0.042 0.7500 0.580
## NK 0.013 -0.009 0.8600 0.910
## Neutrophil 0.002 -0.004 0.9800 0.960
## Treg -0.152 -0.154 0.0430 0.041
##
## --------------------------------------------------------------
## LUAD
## --------------------------------------------------------------
## mb1 mb2 mb1.pval mb2.pval
## CD4T.E -0.061 -0.057 2.6e-01 3.0e-01
## CD8T.E 0.139 0.117 9.6e-03 3.1e-02
## Monocyte.E -0.141 -0.113 8.5e-03 3.8e-02
## B.E -0.010 -0.029 8.6e-01 5.9e-01
## NK.E 0.085 0.076 1.1e-01 1.7e-01
## Neutrophil.E 0.005 -0.017 9.3e-01 7.5e-01
## Treg.E -0.062 -0.051 2.5e-01 3.5e-01
## CD4T -0.340 -0.276 7.0e-11 2.6e-07
## CD8T 0.151 0.132 4.9e-03 1.5e-02
## Monocyte -0.009 0.016 8.7e-01 7.8e-01
## B 0.010 -0.003 8.6e-01 9.6e-01
## NK 0.067 0.069 2.1e-01 2.1e-01
## Neutrophil -0.103 -0.119 5.5e-02 2.8e-02
## Treg -0.159 -0.163 3.0e-03 2.7e-03
##
## --------------------------------------------------------------
## LUSC
## --------------------------------------------------------------
## mb1 mb2 mb1.pval mb2.pval
## CD4T.E -0.211 -0.237 4.6e-04 8.2e-05
## CD8T.E -0.019 -0.094 7.6e-01 1.2e-01
## Monocyte.E -0.256 -0.244 1.9e-05 5.1e-05
## B.E -0.171 -0.200 4.7e-03 9.3e-04
## NK.E -0.072 -0.093 2.4e-01 1.3e-01
## Neutrophil.E 0.018 0.046 7.7e-01 4.5e-01
## Treg.E 0.014 -0.058 8.1e-01 3.4e-01
## CD4T 0.107 0.130 7.8e-02 3.2e-02
## CD8T -0.181 -0.271 2.7e-03 5.9e-06
## Monocyte -0.132 -0.113 2.9e-02 6.2e-02
## B -0.193 -0.249 1.4e-03 3.4e-05
## NK -0.089 -0.080 1.4e-01 1.9e-01
## Neutrophil -0.103 -0.083 9.0e-02 1.7e-01
## Treg 0.180 0.214 2.8e-03 3.9e-04
##
## --------------------------------------------------------------
## SKCM
## --------------------------------------------------------------
## mb1 mb2 mb1.pval mb2.pval
## CD4T.E 0.064 0.028 0.230 0.600
## CD8T.E 0.102 0.045 0.056 0.400
## Monocyte.E -0.031 -0.042 0.560 0.440
## B.E 0.042 -0.013 0.440 0.810
## NK.E 0.026 0.036 0.620 0.510
## Neutrophil.E 0.016 0.034 0.760 0.530
## Treg.E 0.050 0.008 0.350 0.890
## CD4T -0.069 -0.076 0.190 0.160
## CD8T 0.132 0.078 0.013 0.150
## Monocyte 0.012 0.023 0.820 0.670
## B 0.051 0.010 0.340 0.850
## NK 0.104 0.124 0.051 0.021
## Neutrophil -0.055 0.004 0.300 0.940
## Treg -0.101 -0.101 0.058 0.061
Illustrate cell type fraction estimates
## $COAD
## [1] 177 76
##
## $LUAD
## [1] 349 76
##
## $LUSC
## [1] 274 76
##
## $SKCM
## [1] 357 76
## [1] 1157 76
## bcr_patient_barcode SMASH_S_hg38 SMASH_oE_hg38 SMASH_wE_hg38 SMASH_oNE_hg38
## 1 TCGA-A6-2671 3 1.06 0.9274599 0.5531430
## 2 TCGA-A6-2675 2 0.68 0.7213201 0.6600493
## SMASH_wNE_hg38 AscatPurity AscatPloidy raw_MB_hg38_SNV raw_MB_hg38_INDEL
## 1 0.4846527 0.68 3.609337 49 7
## 2 0.6585205 0.49 3.351309 72 2
## IDH_CNV_status_hg38 tCN_burden tCN_burden_ap tumor_type.x num_clonal
## 1 IDH wild type 2.228662 1.0712969 COAD 39
## 2 IDH wild type 1.572784 0.8041944 COAD 46
## num_subclonal prop_clonal gender.x age tumor_type.y
## 1 10 0.7959184 MALE 85 Colon
## 2 26 0.6388889 MALE 78 Colon
## initial_pathologic_diagnosis_method histological_type.x
## 1 <NA> Colon Adenocarcinoma
## 2 <NA> Colon Adenocarcinoma
## yr_of_tobacco_smoking_onset pathologic_T stage Time Delta
## 1 <NA> T3 IV 1331.01 1
## 2 <NA> T3 II 1321.01 0
## radiation_therapy patient_id CD4T.E CD8T.E Monocyte.E B.E
## 1 NO 2671 0.07669056 0.02889390 0.03723064 0.05887684
## 2 NO 2675 0.14310630 0.05848576 0.09817728 0.14408533
## NK.E Neutrophil.E Treg.E CD4T CD8T Monocyte
## 1 0.01678819 0.02455244 0.02696743 0.04567977 0.00500000 0.06194856
## 2 0.03417972 0.04044031 0.06152530 0.17356475 0.09491321 0.07218969
## B NK Neutrophil Treg ...1 type
## 1 0.02762686 0.00500000 0.05317235 0.08157246 1961 COAD
## 2 0.06759522 0.03352732 0.03273829 0.10547152 1964 COAD
## age_at_initial_pathologic_diagnosis gender.y race
## 1 85 MALE WHITE
## 2 78 MALE WHITE
## ajcc_pathologic_tumor_stage clinical_stage histological_type.y
## 1 Stage IV [Not Applicable] Colon Adenocarcinoma
## 2 Stage IIA [Not Applicable] Colon Adenocarcinoma
## histological_grade initial_pathologic_dx_year menopause_status birth_days_to
## 1 [Not Available] 2009 [Not Available] -31329
## 2 [Not Available] 2009 [Not Available] -28813
## vital_status tumor_status last_contact_days_to death_days_to cause_of_death
## 1 Dead WITH TUMOR NA 1331 [Not Available]
## 2 Alive TUMOR FREE 1321 NA [Not Available]
## new_tumor_event_type new_tumor_event_site new_tumor_event_site_other
## 1 <NA> <NA> <NA>
## 2 <NA> <NA> <NA>
## new_tumor_event_dx_days_to treatment_outcome_first_course margin_status
## 1 535 [Not Available] <NA>
## 2 NA Complete Remission/Response <NA>
## residual_tumor OS OS.time DSS DSS.time DFI DFI.time PFI PFI.time Redaction
## 1 NA 1 1331 1 1331 NA NA 1 535 <NA>
## 2 NA 0 1321 0 1321 0 1321 0 1321 <NA>
## [1] "bcr_patient_barcode" "SMASH_S_hg38"
## [3] "SMASH_oE_hg38" "SMASH_wE_hg38"
## [5] "SMASH_oNE_hg38" "SMASH_wNE_hg38"
## [7] "AscatPurity" "AscatPloidy"
## [9] "raw_MB_hg38_SNV" "raw_MB_hg38_INDEL"
## [11] "IDH_CNV_status_hg38" "tCN_burden"
## [13] "tCN_burden_ap" "tumor_type.x"
## [15] "num_clonal" "num_subclonal"
## [17] "prop_clonal" "gender.x"
## [19] "age" "tumor_type.y"
## [21] "initial_pathologic_diagnosis_method" "histological_type.x"
## [23] "yr_of_tobacco_smoking_onset" "pathologic_T"
## [25] "stage" "Time"
## [27] "Delta" "radiation_therapy"
## [29] "patient_id" "CD4T.E"
## [31] "CD8T.E" "Monocyte.E"
## [33] "B.E" "NK.E"
## [35] "Neutrophil.E" "Treg.E"
## [37] "CD4T" "CD8T"
## [39] "Monocyte" "B"
## [41] "NK" "Neutrophil"
## [43] "Treg" "...1"
## [45] "type" "age_at_initial_pathologic_diagnosis"
## [47] "gender.y" "race"
## [49] "ajcc_pathologic_tumor_stage" "clinical_stage"
## [51] "histological_type.y" "histological_grade"
## [53] "initial_pathologic_dx_year" "menopause_status"
## [55] "birth_days_to" "vital_status"
## [57] "tumor_status" "last_contact_days_to"
## [59] "death_days_to" "cause_of_death"
## [61] "new_tumor_event_type" "new_tumor_event_site"
## [63] "new_tumor_event_site_other" "new_tumor_event_dx_days_to"
## [65] "treatment_outcome_first_course" "margin_status"
## [67] "residual_tumor" "OS"
## [69] "OS.time" "DSS"
## [71] "DSS.time" "DFI"
## [73] "DFI.time" "PFI"
## [75] "PFI.time" "Redaction"
names(scDf)[14] = "cancer_type"
scE = scDf[,c(14,30:36)]
rhos = scE %>% pivot_longer(-cancer_type, names_to=c("cell_type"))
ggplot(rhos, aes(x=cell_type, y=value, fill=cancer_type)) +
geom_boxplot() + labs(title="Cell type fraction by gene expression")scM = scDf[,c(14,37:43)]
rhos = scM %>% pivot_longer(-cancer_type, names_to=c("cell_type"))
ggplot(rhos, aes(x=cell_type, y=value, fill=cancer_type)) +
geom_boxplot() + labs(title="Cell type fraction by DNA methylation")Compare cell type fraction estimates between gene expression and DNA methylation
cell_types = c("B", "CD4T", "CD8T", "Monocyte", "NK", "Neutrophil", "Treg")
for(cc1 in cell_types){
g1 = ggplot(scDf, aes_string(x=paste0(cc1, ".E"), y=cc1, color="cancer_type")) +
geom_point() + geom_smooth(method=lm) + facet_grid(~ cancer_type) +
theme(legend.position = "none")
print(g1)
}Compare cell type fraction estimates vs. mutation load
cell_types = c("B", "CD4T", "CD8T", "Monocyte", "NK", "Neutrophil", "Treg")
mb = scDf$raw_MB_hg38_SNV + scDf$raw_MB_hg38_INDEL
mb[which(mb < 10)] = NA
scDf$mb = log10(mb)
for(cc1 in cell_types){
g1 = ggplot(scDf, aes_string(x="mb", y=paste0(cc1, ".E"), color="cancer_type")) +
geom_point() + geom_smooth(method=lm) + facet_grid(~ cancer_type) +
theme(legend.position = "none")
print(g1)
g1 = ggplot(scDf, aes_string(x="mb", y=cc1, color="cancer_type")) +
geom_point() + geom_smooth(method=lm) + facet_grid(~ cancer_type) +
theme(legend.position = "none")
print(g1)
}First define the hypermutation subgroup with larger number of mutations.
coad = scL[["COAD"]]
ggplot(coad, aes(x=log10(raw_MB_hg38_SNV)))+
geom_histogram(color="darkblue", fill="lightblue") +
geom_vline(xintercept = log10(400))Next compare the cell type fractions between hypermuted and non-hypermutated groups.
ctypes = c("CD8T.E", "CD8T", "Treg.E", "Treg")
gl = list()
for(ct1 in ctypes){
w1 = wilcox.test(coad[[ct1]]~ coad$hyper_mut)
gl[[ct1]] = ggplot(coad, aes_string(x="hyper_mut", y=ct1, fill="hyper_mut")) +
theme(legend.position = "none") + geom_violin(trim=FALSE) +
geom_boxplot(width=0.1, fill="white") +
labs(title=sprintf("Wilcox p=%.1e", w1$p.value), x="Hypermutation status")
}
ggarrange(gl[[1]], gl[[2]], gl[[3]], gl[[4]], nrow = 2, ncol = 2)## R version 3.6.2 (2019-12-12)
## Platform: x86_64-apple-darwin15.6.0 (64-bit)
## Running under: macOS Catalina 10.15.5
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/3.6/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/3.6/Resources/lib/libRlapack.dylib
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] tidyr_1.0.2 ggcorrplot_0.1.3 stargazer_5.2.2
## [4] readxl_1.3.1 survminer_0.4.8 survival_3.1-8
## [7] ggpointdensity_0.1.0 ggpubr_0.2.5 magrittr_1.5
## [10] ggplot2_3.3.1 data.table_1.12.8
##
## loaded via a namespace (and not attached):
## [1] zoo_1.8-7 tidyselect_1.0.0 xfun_0.12 reshape2_1.4.3
## [5] purrr_0.3.3 splines_3.6.2 lattice_0.20-38 colorspace_1.4-1
## [9] vctrs_0.3.0 generics_0.0.2 htmltools_0.4.0 mgcv_1.8-31
## [13] yaml_2.2.1 utf8_1.1.4 survMisc_0.5.5 rlang_0.4.6
## [17] pillar_1.4.3 glue_1.3.1 withr_2.1.2 plyr_1.8.5
## [21] lifecycle_0.2.0 stringr_1.4.0 munsell_0.5.0 ggsignif_0.6.0
## [25] gtable_0.3.0 cellranger_1.1.0 evaluate_0.14 labeling_0.3
## [29] knitr_1.28 fansi_0.4.1 broom_0.5.6 Rcpp_1.0.3
## [33] xtable_1.8-4 scales_1.1.0 backports_1.1.5 farver_2.0.3
## [37] km.ci_0.5-2 gridExtra_2.3 digest_0.6.23 stringi_1.4.5
## [41] dplyr_0.8.4 KMsurv_0.1-5 cowplot_1.0.0 grid_3.6.2
## [45] cli_2.0.1 tools_3.6.2 tibble_3.0.1 crayon_1.3.4
## [49] pkgconfig_2.0.3 ellipsis_0.3.0 Matrix_1.2-18 assertthat_0.2.1
## [53] rmarkdown_2.1 R6_2.4.1 nlme_3.1-144 compiler_3.6.2